cd "C:\Users\David\Dropbox\Documents\Work\Clients & prospects\GiveWell\Criminal justice\Replications"

* derived from Ganong "episodes.do" and "INM-R2.do" at https://web.archive.org/web/20160125012736/http:/scholar.harvard.edu/files/ganong/files/crimedofiles.zip?m=1371693701
* convert the raw data into "GA cleaned.dta" for analysis. Used here for both Ganong and Kuziemko
cap program drop CleanData
program define CleanData
	syntax, dataset(string)
	use `"`=cond("`dataset'"=="Ganong", "..\Ganong\INM-R_no_pii", "..\Kuziemko\inm-r-read-in")'"', clear

	cap rename (*yr *mo *da) (*YR *MO *DA)

	rename	V1    recordkey
	rename  V533  ageatadmission
	rename	V1062 parguidebr1
	rename	V1063 parguidebr2
	rename	V1156 violentpersonal
	rename	V1158 nonviolentpersonal
	rename	V1160 property
	rename	V1162 drugsales
	rename	V1164 drugpossessions
	rename	V1166 alcohols
	rename	V1168 habitduis
	rename	V1170 sexoff
	rename	V1172 othercrimenumberofother
	rename	V1174 majoroff
	rename  V1176 majoroffensegroup
	rename  V1201 majoroffcounty
	rename  V1205 majoroffcircuit
	rename	V128 racecode
	rename	V13 priorrecordkey
	if "`dataset'"=="Ganong" {
		rename (V22 V127 V130 V131 V1216) (activeinmateflag sb441flag sexcode ethnicity currvioflag)
	}
	else {
		encode V22 , gen(activeinmateflag)
		replace V127=" " if V127!="N" & V127!="Y" // to match Ganong encoding
		encode V127, gen(sb441flag)
		replace V130="" if V130=="0"
		encode V130, gen(sexcode)
		gen byte ethnicity = V131=="Y" if inlist(V131, "N", "Y")
		replace V1216=" " if V1216!="N" & V1216!="Y"
		encode V1216, gen(currvioflag)
	}
	cap encode ethnicity, replace
	rename	V1375 futureadmcode
	rename	V1377 futureoffcode
	rename	V1381 futureoffgroup
	rename	V1384 futurevioflag
	rename	V1385 futureparrevreason
	rename	V1387 futuredaystoreturn
	rename	V1525 gridyear
	rename	V1527 decisionnum
	rename	V1530 ageatfirstcommit
	rename	V1532 priorconvs
	rename	V1534 priorincars
	rename	V1536 successfactorfactoraage
	rename	V1537 successfactorbprio
	rename	V1538 successfactorcprior
	rename	V1539 successfactord
	rename	V1540 successfactoreheroinor
	rename	V1541 successfactorfcommitment
	rename	V1542 successfactorgfullyempl
	rename	V1543 successfactorhhadwrat
	rename	V1544 severitylevel
	rename	V1545 sucscore
	rename	V1547 sucgroup
	rename	V1548 parmonths
	rename	V1570 raterid
	rename  V1581 boardesttpm
	rename	V1589 adjguidemos
	rename	V1592 boarddecision
	rename	V1596 totalmostoserve
	rename	V1601 guidepriorprobrevoc
	rename	V1602 guidepriorparolerevoc
	rename	V1605 rtphit006mo
	rename	V1606 rtphit012mo
	rename	V1607 rtphit018mo
	rename	V1608 rtphit024mo
	rename	V1609 rtphit030mo
	rename	V1610 rtphit3
	rename	V1624 rtphit10
	rename	V1635 parconsidtype
	rename	V1639 ocdate
	rename	V1653 ocjudge
	rename	V1659 ocda
	rename	V1665 ocinmorprob
	rename	V19 recsinchain
	rename  V193 zipcode
	rename	V220 topofchainflag
	rename	V221 verdictpleaflag
	rename	V23 activeparoleflag
	rename	V24 firstoffenderstatus
	rename	V26 activeprobationflag
	rename	V27 firstconvictionever
	rename	V29 uno
	rename	V319 numpriorincars
	rename	V321 homecounty
	rename	V330 employmentstatus
	rename	V341 maritalstatus
	rename	V346 educationallevel
	rename	V348 oldwratsatscore
	rename	V351 wratreadscore
	rename	V354 wratmathscore
	rename	V357 wratspelscore
	rename	V360 iqscore
	rename	V39 inmatetype
	rename	V457 cumdiscipviolent
	rename	V460 cumdiscipnonviolent
	rename	V463 cumdisciptotal
	rename	V466 probfollowmos
	rename	V501 drugbehavprob
	rename	V502 alcbehavprob
	rename	V503 fambehavcode1
	rename	V505 fambehavcode2
	rename	V507 fambehavcode3
	rename	V509 fambehavcode4
	rename	V511 fambehavcode5
	rename	V528 ageatsentencing
	rename	V531 typeadmission
	rename	V540 probationfollows
	rename	V542 probablereltype
	rename	V558 sentlengthdays
	rename  V563 sentlengthyrs
	rename  V695 paremflag
	rename	V7 topchain
	rename	V714 pardischargereason
	rename	V777 jailcreditdays
	rename	V797 jailmaxoutstatus
	rename	V800 releasecode
	rename	V802 deathtype
	rename	V804 departurecodegroup
	rename	V805 ageatrelease
	rename	V812 servdaysprsnjail
	rename	V826 admneworrevnewcasever
	rename	V832 admnewprobpar
	rename	V833 parrevreason
	rename  V836_* curoff*

	if "`dataset'"=="Ganong" {
		foreach var of varlist curoff* {
			ren `var' curoff
			joinby curoff using "..\Ganong\curoff recoding", unmatched(master)
			drop _merge curoff
			ren _curoff `var'
		}
	}

	rename	V975 reconvchkptstilhit
	rename	V977 reconvinmorprob
	rename  V983 reconvoffensecode1
	rename  V978 reconvdate
	replace reconvdate = cond(reconvdate, reconvdate + td(1jan1900), .) // convert from Excel to Stata date number
	rename V992  reconvarea3 // usually agrees with reconvchkptstilhit<=6, but not always
	rename V1006 reconvarea10
	rename V1356 probrevtech
	rename V1357 probrevconv
	rename V1358 probrevwoconv
	rename V1359 probrevsent
	rename V1360 probrevunknown
	rename V1361 probnewcourt
	rename V145 height
	rename V148 weight
	rename V513 mildischarge
	rename V646 mentalhealthhigh
	rename V647 mentalhealthrecent
	rename V1551 ballot

	gen int filecreationdate = mdy(V311MO ,V311DA ,V311YR )
	gen int birthdate        = mdy(V135MO ,V135DA ,V135YR )
	gen int firstcontactdate = mdy(V1226MO,V1226DA,V1226YR)
	gen int crimedate        = mdy(V1234MO,V1234DA,V1234YR)
	gen int sentdate         = mdy(V1730MO,V1730DA,V1730YR)
	gen int sentbegindate    = mdy(V1258MO,V1258DA,V1258YR)
	gen int admitdate        = mdy(V1274MO,V1274DA,V1274YR)
	gen int futureadmdate    = mdy(V1420MO,V1420DA,V1420YR)
	gen int oldtentreldate   = mdy(V1466MO,V1466DA,V1466YR)
	gen int tentparoledate   = mdy(V1474MO,V1474DA,V1474YR)
	gen int actualreldate    = mdy(V1498MO,V1498DA,V1498YR)
	gen int maxreldate       = mdy(V1482MO,V1482DA,V1482YR)
	if "`dataset'"=="Kuziemko" gen int ratedate = date(V1573, "YMD")
	else {
		decode V1573, gen(_V1573)
		gen int ratedate = date(_V1573, "YMD")
	}

	format %td *date
	label drop _all

	label values reconvoffensecode1 OTISCrime
	label define revokereason ///
		0  "Blank" ///
		1  "Waiver - Technical Violation(s)" ///
		2  "Waiver - New Offense(s)" ///
		3  "Hearing - Technical Violation(s)" ///
		4  "Hearing - New Offense(s)" ///
		5  "New Conviction" ///
		6  "Waiver - Misdemeanor  (not convicted," ///
		7  "Waiver - Felony  (not convicted, but" ///
		8  "Hearing - Misdemeanor (not convicted," ///
		9  "Hearing - Felony  (not convicted, but" ///
		10 "New Conviction - Misdemeanor" ///
		11 "New Conviction - Felony"
	label values parrevreason revokereason

	label define returnreason 1 "New Conviction" 2 "Probation Revoke" 3 "Parole Revoke"
	label values admnewprobpar returnreason

	label define alcbehavprob ///
		0 "No alcohol problem" ///
		1 "Alcoholic" ///
		2 "Alcohol Abuser"
	label values alcbehavprob alcbehavprob

	label define drugbehavprob ///
		0 "No drug problem" ///
		5 "Drug experimenter" ///
		6 "Drug abuser" ///
		7 "Narcotic addict"
	label values drugbehavprob drugbehavprob

	label define mentalhealth ///
		0 "No mental health eval" ///
		1 "Inmate OK" ///
		2 "Outpatient" ///
		3 "Supportive Living Unit (moderate)" ///
		4 "Supportive Living Unit (high)" ///
		5 "Crisis Stabilization" ///
		6 "In Binion Building"
	label values mentalhealthhigh mentalhealthrecent mentalhealth

	label define races 1 "White" 2 "Black" 3 "Native American" 4 "Error" 5 "Other" 6 "Asian" 7 "Error" 8 "Unclassified" 9 "Unknown"
	label values racecode races

	label define sexes 1 "Female" 2 "Male"
	label values sexcode sexes

	label define employmentstatuses ///
		01 "Employed Full Time" ///
		02 "Employed Part Time" ///
		03 "Unemployed For Less Then 6 Months" ///
		04 "Unemployed For 6 Months Or More" ///
		05 "Never Worked (Capable, Non-Student)" ///
		06 "Student" ///
		07 "Incapable Of Work" ///
		97 "Other" ///
		98 "Not Reported"
	label values employmentstatus employmentstatuses

	label var sucscore "Success Score"
	label var ageatrelease "Age at Release"
	label var departurecodegroup "Departure code"
	label var parconsidtype "Type of Parole Considered"

	ren (V1338 V1344 V1350) (parcrimecode1 parcrimecode2 parcrimecode3)
	ren (V1343 V1349 V1355) (parcrimesuffix1 parcrimesuffix2 parcrimesuffix3)
	ren (V1342 V1348 V1354) (parcrimeseverity1 parcrimeseverity2 parcrimeseverity3)
	if "`dataset'"=="Ganong" recode parcrimeseverity? (1=.) (2=0) (3=1) (4=2) (5=3) (6=4) (7=5) (8=6) (9=7) (10=8) // undo recoding
		else destring parcrime*, replace force
	label values parcrimeseverity?

	* For Kuziemko, compute average sentence for each conviction crime and map to reconviction crimes
	preserve
	gen avgSentLenMos = cond(sentlengthyrs>=1000, 100, sentlengthyrs/100*12)
	collapse avgSentLenMos, by(curoff1)
	ren curoff1 reconvoffensecode1
	tempfile temp
	save `temp'
	restore
	merge m:1 reconvoffensecode1 using `temp', keep(match master) nogen

	drop if admitdate == .

	*** Drop duplicates for admit and release dates.
	// DR note: no duplicates occur if one first restricts by validDates, defined below. But doing that here would change sample slightly. 
	duplicates tag uno admitdate, gen(tag1)

	duplicates tag uno actualreldate if !tag1, gen(tag2)
	list uno sentbegindate admitdate actualreldate curoff1 if tag2
	gen byte duplicateGanong = tag1 | tag2

	drop tag?
	cap drop V* _V*

	sort uno admitdate

	gen int year = year(ratedate)

	*sucgroup related
	recode sucscore (13/20 = 5) (11/12 = 4) (9/10 = 3) (6/8 = 2) (0/5 = 1), gen(sucgroup79)
	cap drop sucgroup
	recode sucscore (14/20 = 6) (13 = 5) (11/12 = 4) (9/10 = 3) (6/8 = 2) (0/5 = 1), gen(sucgroup)
	recode sucscore (14/20 = 3) (9/13 = 2) (0/8 = 1), gen(sucgroup93)

	*sentence-related
	gen float timeserv = servdaysprsnjail/365
	gen float sentlen = sentlengthdays/365
	gen sentLenMos = round(sentlengthyrs/100*12, 1) if sentlengthyrs<888888 & sentlengthyrs>4 // Kuziemko definition
	gen int cell = severitylevel*22 + sucscore
	gen int yearrel = year(actualreldate)
	gen int admityear = year(admitdate)
	gen byte maxout = inlist(departurecodegroup, 3, 4)

	*demographic-related
	gen int ageatfirstcontactNum = round((firstcontactdate - birthdate)/365)
	replace ageatfirstcontactNum = 16 if ageatfirstcontactNum < 16
	gen byte ageAtEntry = round(ageatrelease-timeserv)
	replace  ageAtEntry = 18 if ageAtEntry < 18

	gen byte validDates = admitdate <= ratedate & ratedate <= actualreldate & actualreldate <= futureadmdate

	* recid stats
	gen int rtpdist =  futureadmdate - actualreldate
	gen byte felony     = admnewprobpar==1 | inlist(parrevreason, 2, 4, 5, 7, 9, 11)
	gen byte felonyConv = admnewprobpar==1 | inlist(parrevreason,       5,       11)
	
	sort uno admitdate
	foreach t in 3 10 {
		gen byte reconv`t' = reconvchkptstilhit >= 1 & reconvchkptstilhit <= 2*`t'
		gen byte rtp`t' = rtpdist <= 365*`t' if actualreldate < filecreationdate -  365*`t'
		gen byte rtpFelony`t'      = rtp`t' * (admnewprobpar[_n+1]==1 | inlist(parrevreason[_n+1], 2, 4, 5, 7, 9, 11)) * (uno==uno[_n+1]) // exclude technicals and misdemeanors
		gen byte rtpFelonyConv`t'  = rtp`t' * (admnewprobpar[_n+1]==1 | inlist(parrevreason[_n+1],       5,       11)) * (uno==uno[_n+1]) // also exclude felonies charges unless leading to conviction
		foreach var in "" Felony FelonyConv {
			gen byte rtp`var'Prob`t' = rtp`var'`t' * (admnewprobpar[_n+1]==2)
			gen byte rtp`var'Par`t'  = rtp`var'`t' * (admnewprobpar[_n+1]==3)
		}

		gen rtphitWt`t' = cond(reconvoffensecode1, avgSentLenMos, 0) if reconvarea`t'==1 // Kuziemko severity-weighted recidivism
		replace rtphitWt`t' = (sentLenMos - timeserv*12) if rtphitWt`t'==. & rtphit`t'==1
		replace rtphitWt`t' = 0 if reconvarea`t'==0 & rtphit`t'==0
		
		gen rtpFelonyWt`t'     = rtpFelony`t'     * rtphitWt`t'
		gen rtpFelonyConvWt`t' = rtpFelonyConv`t' * rtphitWt`t'
	}

	gen byte CurrOffCostGroup  = ///
		1 * (curoff1>=1100 & curoff1<=1124) + ///
		2 * (curoff1>=2000 & curoff1<=2099) + ///
		3 * (inlist(curoff1, 1190, 1302, 1304, 1305, 1308, 1309, 1310, 1311, 1313, 1314, 1315, 1318, 1330, 1331)) + ///
		4 * (curoff1>=1300 & curoff1<=1399 & !inlist(curoff1, 1032, 1304, 1305, 1308, 1309, 1310, 1311, 1313, 1314, 1315, 1318, 1330, 1331)) + ///
		5 * (curoff1>=1900 & curoff1<=1999)  + ///
		6 * (curoff1>=1600 & curoff1<=1699) + ///
		7 * (curoff1>=1800 & curoff1<=1899 & curoff1!=1813) + ///
		8 * (curoff1==1813) + ///
		9 * (curoff1>=1400 & curoff1<=1499) + ///
		10* (curoff1>=1500 & curoff1<=1599) + ///
		11* (curoff1>=1700 & curoff1<=1799) + ///
		12* (curoff1==5005) + ///
		13* (curoff1>=4000 & curoff1<=4999) + ///
		14* ((curoff1>=2100 & !(curoff1==5005) & !(curoff1>=4000 & curoff1<=4999)) | (curoff1>1124 & curoff1<=1199 & curoff1!=1190)) ///
	if admnewprobpar==1 // new convictions only
	replace CurrOffCostGroup = 14+(parrevreason-5)/2 if admnewprobpar!=1 & inlist(parrevreason, 7, 9, 11) // if triggering revocation, new felony not recorded in curoff1

	label define CurrOffCostGroups ///
		1  "Homicide" ///
		2  "Rape" ///
		3  "AssaultAgg" ///
		4  "AssaultSimp" ///
		5  "Robbery" ///
		6  "Burglary" ///
		7  "Larceny" ///
		8  "MVTheft" ///
		9  "Arson" ///
		10 "Vandalism" ///
		11 "Fraud" ///
		12 "DUI " ///
		13 "Drug" ///
		14 "Other" ///
		15 "RevocWaiv" ///
		16 "RevocHear" ///
		17 "RevocNewConv"
	label var CurrOffCostGroup CurrOffCostGroups
	
	gen int cellCluster  = (sucgroup-1) * 7 + severitylevel

	saveold "GA cleaned `dataset'", replace version(12)
end

* Parole guideline grids adopted in 1981, 1983, 1993
mat grid81 = ///
	 36,  36,  36,  36,  36, 24, 24, 24, 15, 15, 12, 12, 9, 9, 9, 9, 9, 9, 9, 9 \ ///
	 42,  42,  42,  42,  42, 27, 27, 27, 18, 18, 15, 15, 12, 12, 12, 12, 12, 12, 12, 12 \ ///
	 48,  48,  48,  48,  48, 30, 30, 30, 24, 24, 18, 18, 15, 15, 15, 15, 15, 15, 15, 15 \ ///
	 54,  54,  54,  54,  54, 36, 36, 36, 30, 30, 24, 24, 21, 21, 21, 21, 21, 21, 21, 21 \ ///
	 60,  60,  60,  60,  60, 42, 42, 42, 36, 36, 30, 30, 27, 27, 27, 27, 27, 27, 27, 27 \ ///
	 78,  78,  78,  78,  78, 66, 66, 66, 54, 54, 54, 54, 42, 42, 42, 42, 42, 42, 42, 42 \ ///
	102, 102, 102, 102, 102, 90, 90, 90, 84, 84, 84, 84, 78, 78, 78, 78, 78, 78, 78, 78

mat grid83 = ///
	 18,  18,  18,  18,  18,  18,  12, 12, 12,  8,  8,  6,  6,  4,  4,  4,  4,  4,  4,  4,  4  \  ///
	 21,  21,  21,  21,  21,  21,  14, 14, 14,  9,  9,  8,  8,  6,  6,  6,  6,  6,  6,  6,  6  \  ///
	 24,  24,  24,  24,  24,  24,  15, 15, 15, 12, 12,  9,  9,  8,  8,  8,  8,  8,  8,  8,  8  \  ///
	 27,  27,  27,  27,  27,  27,  18, 18, 18, 15, 15, 12, 12, 10, 10, 10, 10, 10, 10, 10, 10  \  ///
	 52,  52,  52,  52,  52,  52,  40, 40, 40, 30, 30, 25, 25, 20, 20, 20, 20, 20, 20, 20, 20  \  ///
	 78,  78,  78,  78,  78,  78,  60, 60, 60, 54, 54, 48, 48, 36, 36, 36, 36, 36, 36, 36, 36  \  ///
	102, 102, 102, 102, 102, 102,  90, 90, 90, 78, 78, 72, 72, 60, 60, 60, 60, 60, 60, 60, 60

mat grid93 = ///
	 22,  22,  22,  22,  22,  22,  22,  22,  22, 16, 16, 16, 16, 16, 10, 10, 10, 10, 10, 10, 10 \  ///
	 24,  24,  24,  24,  24,  24,  24,  24,  24, 18, 18, 18, 18, 18, 12, 12, 12, 12, 12, 12, 12 \  ///
	 26,  26,  26,  26,  26,  26,  26,  26,  26, 20, 20, 20, 20, 20, 14, 14, 14, 14, 14, 14, 14 \  ///
	 28,  28,  28,  28,  28,  28,  28,  28,  28, 22, 22, 22, 22, 22, 16, 16, 16, 16, 16, 16, 16 \  ///
	 52,  52,  52,  52,  52,  52,  52,  52,  52, 40, 40, 40, 40, 40, 34, 34, 34, 34, 34, 34, 34 \  ///
	 78,  78,  78,  78,  78,  78,  78,  78,  78, 62, 62, 62, 62, 62, 52, 52, 52, 52, 52, 52, 52 \  ///
	102, 102, 102, 102, 102, 102, 102, 102, 102, 84, 84, 84, 84, 84, 72, 72, 72, 72, 72, 72, 72


***
*** Tables in review of Ganong
***

cap program drop PrepGanong
program define PrepGanong // First arg should be grid revision date, followed by optional if clause
	local griddate `1'
	macro shift
	use "GA cleaned Ganong" `*', clear
	keep if ratedate<td(27jun2007) // after this date, sucscore also takes values in 20-97 range
	
	gen byte after = ratedate>=td(`griddate')

	*look at observations considered for parole within a year of reform
	gen byte gridPeriod      = ratedate >= td(`griddate')-365      & ratedate < td(`griddate')+365
	gen byte gridPeriodLarge = ratedate >= td(`griddate')-365.25*4 & ratedate < td(`griddate')+365.25*4

	* mark later appearances if there are multiple
	egen int firstratedate = min(ratedate) if gridPeriod & !duplicateGanong, by(uno)
	gen byte obsIsFirstratedate = firstratedate==ratedate
	*check that time served is 3 months to 10 years
	gen byte timeLowerBound = servdaysprsnjail >= 90
	gen byte timeUpperBound = servdaysprsnjail <= 10*365

	gen byte baseObserv      = gridPeriod      & validDates & timeLowerBound & timeUpperBound & obsIsFirstratedate
	gen byte baseObservLarge = gridPeriodLarge & validDates & timeLowerBound & timeUpperBound

	/*copy release date for base observation over to others. need to make sure 
	later that obs within horizon are only those after the base episode and not
	those before */
	egen int baseRelDate = max(actualreldate*baseObserv) if !duplicateGanong, by(uno)
	egen int baseAdmitDate = max(admitdate*baseObserv) if !duplicateGanong, by(uno)

	*recode revocations
	egen int newConvictionDate = min(admitdate * cond(duplicateGanong,.,1)) if !duplicateGanong, by(uno sentbegindate curoff1) // DR comment: this is a little off because data set has already been truncated at 1Apr1989; but only affects i.curoff1
	gen byte newConviction = admitdate == newConvictionDate

	*recode parole revocations without new convictions by felony, misdemeanor	or technical
	replace curoff1 = 1001 if !newConviction & inlist(parrevreason, 7, 9, 11)
	replace curoff1 = 1002 if !newConviction & inlist(parrevreason, 6, 8, 10)
	replace curoff1 = 1003 if !newConviction & !inlist(parrevreason, 7, 9, 11, 6, 8, 10)

	/*educationallevel recodes -- specific to Ganong because they are sample-dependent
	scores 17-21 indicate advanced training after bachelor's,
	30 is some technical school (so 10 years edu), 31 is completed (so 12)
	90 is no educationallevel
	0, 97, 98 are not reported
	99 is a code not discussed in the codebook */
	sum educationallevel if !duplicateGanong, meanonly
	recode educationallevel (17/21 = 16) (0 97/99 = `r(mean)') (90 = 0) (30 = 10) (31 = 12)
	xtile read = wratreadscore if !duplicateGanong, n(5)
	xtile math = wratmathscore if !duplicateGanong, n(5)
	xtile spel = wratspelscore if !duplicateGanong, n(5)

	* topcoding
	replace priorincars = 5 if priorincars > 5
	replace priorconvs = 10 if priorconvs > 10
	replace cumdisciptotal = 5 if cumdisciptotal > 5
	replace sentlen = 60 if sentlen > 60

	recode curoff1 drugbehavprob alcbehavprob ageatfirstcontactNum (. = 9999) // make non-missing so their factor variables don't shrink sample
end

global controlsGanong  educationallevel read math spel i.maritalstatus i.racecode  ///
									i.sexcode i.employmentstatus  ///
									i.cumdisciptotal i.drugbehavprob i.alcbehavprob  ///
									i.mentalhealthhigh i.ageatfirstcontactNum ///
									i.priorincars i.priorconvs i.curoff1 sentlen ///
									i.successfactorfactoraage  ///
									i.successfactord  ///
									i.successfactoreheroinor  ///
									i.successfactorfcommitment  ///
									i.successfactorgfullyempl  ///
									i.successfactorhhadwrat  ///
									violentpersonal nonviolentpersonal property ///
									drugsales drugpossessions alcohols ///
									habitduis sexoff i.ageAtEntry i.admityear

* CleanData, dataset(Ganong) // rerun initial data set construction


* First table
outreg, clear(Results)
foreach ratedate in "" ratedate { // include continuous time control?
	foreach depvar in rtp3 rtpFelony3 rtpFelonyConv3 numFelonies10 numFeloniesConv10 {
		outreg, clear
		global outregcmd outreg, keep(timeserv) se sdec(3) starloc(1) starlevel(10 5 1) coljust(l{c}c) rtitle("`depvar'") `=cond("`depvar'"=="numFeloniesConv10",`"summstat(idp \ widstat \ N) summtitle("Kleibergen-Paap underid. p" \ "Kleibergen-Paap F" \ "N")"',"noautosumm")' nodisplay merge
		
		foreach RevisionDate in 1May1983 1Apr1993 {
			PrepGanong `RevisionDate' if !duplicateGanong & min(ratedate,actualreldate)>=td(`RevisionDate')-365*4

			if        "`depvar'"=="numFelonies10"     egen numFelonies10     = total(!baseObserv & admitdate < baseRelDate+365*10 & admitdate > baseAdmitDate & felony    ), by(uno)
			  else if "`depvar'"=="numFeloniesConv10" egen numFeloniesConv10 = total(!baseObserv & admitdate < baseRelDate+365*10 & admitdate > baseAdmitDate & felonyConv), by(uno)

			keep if ratedate >= td(`RevisionDate')-366 & ratedate < td(`RevisionDate')+365 & validDates & servdaysprsnjail>=90 & servdaysprsnjail<=10*365 & obsIsFirstratedate

			ivreg2 `depvar' (timeserv = `=cond("`ratedate'"=="","parmonths","after")') `ratedate' i.severitylevel#i.sucscore                , cluster(cellCluster) small
			$outregcmd ctitle("", "`RevisionDate' revision" \ "", "Grid FE")

			ivreg2 `depvar' (timeserv = `=cond("`ratedate'"=="","parmonths","after")') `ratedate' i.severitylevel#i.sucscore $controlsGanong, cluster(cellCluster) small
			$outregcmd ctitle("", "`RevisionDate' revision" \ "", "+more controls")
		}
		outreg, append replay(Results) store(Results)
	}
}
outreg using "Ganong replication", replace replay(Results) coljust(l{c}c)

* Second table--impacts on 10-year felony return-to-prison counts, by crime
global L 10 // 10-year time horizon
global CrimeTypes Homicide Rape AssaultAgg AssaultSimp Robbery Burglary Larceny MVTheft Arson Vandalism Fraud DUI Drug Other RevocWaiv RevocHear RevocNewConv
global outregcmd outreg, keep(timeserv) se starloc(1) starlevel(10 5 1) coljust(l{c}c) /*nodisplay*/ append noautosumm
outreg, clear(Results)
foreach ratedate in "" ratedate { // include continuous time control?
	foreach RevisionDate in 1May1983 1Apr1993 {
		PrepGanong `RevisionDate' if !duplicateGanong & min(ratedate,actualreldate)>=td(`RevisionDate')-365*4
		gen byte withinHorizon = admitdate>baseAdmitDate & !baseObserv & admitdate < baseRelDate + $L * 365.25
		gen byte sample = ratedate >= td(`RevisionDate')-366 & ratedate < td(`RevisionDate')+365 & validDates & servdaysprsnjail >= 90 & servdaysprsnjail <= 10*365 & obsIsFirstratedate
		xi i.severitylevel*i.sucscore
		foreach controls in "" /*"$controlsGanong"*/ {
			outreg, clear
			foreach c in "1,2,3,4,5,6,7,8,9,10,11,12,13,14,17" 1 2 3 4 5 6 7 8 9 10 11 /*12*/ 13 14 15 16 17 { // First is all convictions. Almost no data for 12=DUI
				cap drop numFelonies10
				egen numFelonies10 = total(inlist(CurrOffCostGroup,`c') & withinHorizon), by(uno)
				cap noi ivreg2 numFelonies10 (timeserv = `=cond("`ratedate'"=="","parmonths","after")') `ratedate' _I* `controls' if sample, cluster(cellCluster) small
				if !_rc $outregcmd ctitle("", "`ratedate' " \ "", "`RevisionDate' " \ "", `"`=cond("`controls'"=="","Grid FE","+more controls")'"') rtitle("`:word `c' of $CrimeTypes'")
			}
			outreg, replay(Results) merge store(Results)
		}
	}
}
outreg using "Ganong 10-year by crime", replace replay(Results)


***
*** Ganong review graphs
***

PrepGanong 1Apr1993 if ratedate>td(1jan1982) & ratedate<td(1apr1997) & !duplicateGanong & validDates & servdaysprsnjail>=90 & servdaysprsnjail<=10*365

* rise in age at release after emergency release program begins in first half of 1989 (cited in Roodman text):
twoway lpoly ageatrelease actualreldate if actualreldate>=td(1jan1987) & actualreldate<=td(31dec1991), xlabel(,format(%td)) bw(10) name(ageatrelease, replace)
* releases by month under Governor's Emergency Release program:
preserve
collapse (count) N=uno if releasecode==8, by(actualreldate)
line N actualreldate, xlabel(,format(%td)) name(GovEmerRel, replace)
restore

* time served & recidivism over time
gen servmonthsprsnjail = servdaysprsnjail/365.25*12
twoway lpolyci rtp3 ratedate if ratedate<td(1may1983)                                     , bw(100) clcolor(black) || ///
       lpolyci rtp3 ratedate if ratedate>=td(1may1983) & ratedate<td(1Apr1993)            , bw(100) clcolor(black) || ///
			 lpolyci rtp3 ratedate if ratedate>=td(1Apr1993)                                    , bw(100) clcolor(black) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate<td(1may1983)                         , bw(100) lcolor(black) lpat(dash) yaxis(2) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate>=td(1may1983) & ratedate<td(1Apr1993), bw(100) lcolor(black) lpat(dash) yaxis(2) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate>=td(1Apr1993)                        , bw(100) lcolor(black) lpat(dash) yaxis(2) ||  ///
			 if  servdaysprsnjail>=90 & servdaysprsnjail<=10*365, ///
			 tline(1May1983 1Apr1993) ///
			 legend(order(1 7) label(1 "Return-to-prison rate, 95% CI (left axis)") label(7 "Months served (right axis)") region(style(none)) margin(zero) size(small)) ///
			 xtitle("Date of parole decision",  size(small)) ///
			 ytitle("Return-to-prison rate"  ,  size(small)) ///
			 ytitle("Months served", axis(2) orient(rvertical) size(small)) ///
			 xlabel(`=td(1jan1982)'(`=2*365.25')`=td(1jan1996)', format(%tdMon-YY) labsize(small)) ///
			 ylabel(.25 "25%" .3 "30%" .35 "35%" .4 "40%", angle(hor) labsize(small)) ///
			 ylabel(20(5)50, angle(hor) axis(2) labsize(small)) ///
			 scheme(s1color) name(rtp, replace)
graph export "Ganong rtp vs. time served.png", replace width(666)

twoway lpolyci reconv3 ratedate if ratedate<td(1may1983)                                  , bw(100) clcolor(black) || ///
       lpolyci reconv3 ratedate if ratedate>=td(1may1983) & ratedate<td(1Apr1993)         , bw(100) clcolor(black) || ///
			 lpolyci reconv3 ratedate if ratedate>=td(1Apr1993)                                 , bw(100) clcolor(black) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate<td(1may1983)                         , bw(100) lcolor(black) lpat(dash) yaxis(2) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate>=td(1may1983) & ratedate<td(1Apr1993), bw(100) lcolor(black) lpat(dash) yaxis(2) || ///
			 lpoly servmonthsprsnjail ratedate if ratedate>=td(1Apr1993)                        , bw(100) lcolor(black) lpat(dash) yaxis(2) ,  ///
			 tline(1May1983 1Apr1993) ///
			 legend(order(1 7) label(1 "Return-to-prison rate, 95% CI (left axis)") label(7 "Months served (right axis)") region(style(none)) margin(zero) size(small)) ///
			 xtitle("Date of parole decision",  size(small)) ///
			 ytitle("Reconviction rate",  size(small)) ///
			 ytitle("Months served", axis(2) orient(rvertical) size(small)) ///
			 xlabel(`=td(1jan1982)'(`=2*365.25')`=td(1jan1996)', format(%tdMon-YY) labsize(small)) ///
			 ylabel(.20 "20%" .25 "25%" .3 "30%" .35 "35%", angle(hor) labsize(small)) ///
			 ylabel(20(5)50, angle(hor) axis(2) labsize(small)) ///
			 scheme(s1color) name(reconv, replace)
graph export "Ganong reconv vs. time served.png", replace width(666)


***
*** Kuziemko replication
***

cap program drop PrepKuziemko
program define PrepKuziemko
	syntax, dataset(string)
	use "GA cleaned `dataset'", clear

	recode racecode (9 = .)
	gen byte drugcrime = curoff1>=4000 & curoff1<=4136
	gen byte violcrime = 3.currvioflag
	gen byte propcrime = !inlist(majoroffensegroup, ., 3, 4, 6, 11, 12, 14, 15, 16, 17) & !inlist(majoroffensegroup, 18, 19, 20, 33, 34, 35, 36) // embodies bug in definition
	gen byte burglary = curoff1==1601
	gen byte hispanic = ethnicity==1
	gen sentlenRound = round(sentLenMos, 12)
	replace timeserv = timeserv * 12
	gen timerec = timeserv + mofd(oldtentreldate)-mofd(actualreldate) // time recommended, mass release
	gen admitmonth = month(admitdate)
	gen actualrelyear  =  year(actualreldate)
	gen sentbeginyear = year(sentbegindate)
	gen crimeyear = year(crimedate)
	gen byte after97 = crimeyear > 1997 if crimeyear<2007 // 90% policy adoption
	gen clust = parcrimeseverity1 * 10000 + sucscore //  parcrimeseverity1 is used, but not quite same as grid severity
	gen byte pct90crime = inlist(curoff1 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff1 ,1123,1601) & ((parcrimecode1==curoff1  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff1  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff1  & parcrimeseverity3==5 & parcrimesuffix3==1))) | ///
	                      inlist(curoff2 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff2 ,1123,1601) & ((parcrimecode1==curoff2  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff2  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff2  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff3 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff3 ,1123,1601) & ((parcrimecode1==curoff3  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff3  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff3  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff4 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff4 ,1123,1601) & ((parcrimecode1==curoff4  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff4  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff4  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff5 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff5 ,1123,1601) & ((parcrimecode1==curoff5  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff5  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff5  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff6 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff6 ,1123,1601) & ((parcrimecode1==curoff6  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff6  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff6  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff7 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff7 ,1123,1601) & ((parcrimecode1==curoff7  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff7  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff7  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff8 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff8 ,1123,1601) & ((parcrimecode1==curoff8  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff8  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff8  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff9 ,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff9 ,1123,1601) & ((parcrimecode1==curoff9  & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff9  & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff9  & parcrimeseverity3==5 & parcrimesuffix3==1))) | /// 
	                      inlist(curoff10,1102,1103,1121,1190,1302,1305,1314,1315,1321,1901,1903,1904,1905,1911,2006,2018,2019,2020,2091,2751,2801) | (inlist(curoff10,1123,1601) & ((parcrimecode1==curoff10 & parcrimeseverity1==5 & parcrimesuffix1==1) | (parcrimecode2==curoff10 & parcrimeseverity2==5 & parcrimesuffix2==1) | (parcrimecode3==curoff10 & parcrimeseverity3==5 & parcrimesuffix3==1)))

	* Roodman-preferred definitions
	gen byte drugcrimeDR = inlist(majoroffensegroup, 33, 34, 35, 36)
	gen byte propcrimeDR = inlist(majoroffensegroup, 11, 12, 14, 15, 16) // definitions actually taken from Kuziemko's revised code
	gen byte violcrimeDR = inlist(majoroffensegroup, 3, 4, 6, 17, 18, 19, 20)
	recode majoroff (5 7 = 4) (2 6 8 = 9) // group into 1=violent, 3=property, 4=drug, 9=other
	gen clustDR = sucscore*10 + severitylevel
	gen sentyear = year(sentdate)
	gen timerecDR = timeserv + (oldtentreldate - actualreldate)/365.25*12 // difference in day-resolution rather than month-resolution dates
	gen byte after97DR = sentyear > 1997 // 90% policy applied to sentences decided, not crimes committed, as of 1 January 1998. http://j.mp/29EplKh

	keep if sentLenMos>6 & sentLenMos<121 & admnewprobpar==1
end

global generalSample ageatadmission>17 & sb441flag!=3 & sucscore>0 & sucscore<20 & activeinmateflag==1 & ///
	parconsidtype==1 & parcrimeseverity1>0 & admitdate>=td(1jan1993) & actualreldate<td(1jan2008)

global gridSample   admitdate>=td(1jan1995) & admitdate<td(1jan2006) & sentbegindate>=td(1jan1995) & sentbegindate<td(1jan2006) &  ///
	parconsidtype==1 & timeserv>0 & totalmostoserve>0 & parcrimeseverity1>0 & !pct90crime & sb441flag!=3 & parcrimeseverity1<5 & ///
	sucscore>3 & sucscore<14 & actualreldate<td(1jan2006)
global gridSampleDR admitdate<td(1jan2006) &        sentdate<td(1jan2006) & ///
	parconsidtype==1 & timeserv>0 & totalmostoserve>0 &                       !pct90crime & sb441flag!=3 &     severitylevel<5 & ///
	                           actualreldate<td(1jan2006) & ageatadmission>17

global massReleaseSample activeinmateflag==1 & sentLenMos<73 & timeserv>0 & actualreldate==td(16mar1981) & releasecode==40

global pct90Sample   sentbegindate>=td(1jan1993) & sentbegindate<td(1jan2002) & parconsidtype==1 & timeserv>0 & totalmostoserve>0 & sb441flag!=3 & ///
	sucscore>0 & sucscore<21 & parcrimeseverity1<8 & ageatadmission>17
global pct90SampleDR      sentdate>=td(1jan1993) & sentdate<td(1jan2002)      & parconsidtype==1 & timeserv>0 & totalmostoserve>0 & sb441flag!=3 & ///
	sucscore>0 & sucscore<21 &     severitylevel<8 & ageatadmission>17 & sentlen<=5

global controlsKuziemko 2.racecode 2.sexcode ageatadmission numpriorincars


*** replication of original tables

* CleanData, dataset(Kuziemko)
PrepKuziemko, dataset(Kuziemko)

* Table I
frmttable, clear(TableI)
foreach var in rtphit3 timeserv $controlsKuziemko {
	frmttable, clear
	foreach sample in "$generalSample" "$gridSample" "$massReleaseSample" "$pct90Sample & !pct90crime" "$pct90Sample & pct90crime" {
		sum `var' if `sample', meanonly
		mat t = r(mean)
		frmttable, statmat(t) rtitle("`var'") merge nodisplay sdec(3)
	}
	frmttable, replay(TableI) append store(TableI) nodisplay
}
frmttable, clear
qui foreach sample in "$generalSample" "$gridSample" "$massReleaseSample" "$pct90Sample & !pct90crime" "$pct90Sample & pct90crime" {
	count if `sample'
	mat t = r(N)
	frmttable, statmat(t) rtitle("Observations") merge nodisplay sdec(0)
}
frmttable, replay(TableI) append store(TableI)

* Table II
  regress      rtphit3    timeserv              $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear                                            if $gridSample                , cluster(clust)
outreg, keep(timeserv $controlsKuziemko)  se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(1)")       nodisplay
  regress                 timeserv   parmonths  $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear i.parcrimeseverity1             i.sucscore if $gridSample & rtphit3<.    , cluster(clust)
outreg, keep($controlsKuziemko parmonths) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(2)") merge nodisplay
ivregress 2sls rtphit3   (timeserv = parmonths) $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear i.parcrimeseverity1             i.sucscore if $gridSample                , cluster(clust) small
outreg, keep(timeserv $controlsKuziemko)  se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(3)") merge nodisplay
ivregress 2sls rtphit3   (timeserv = parmonths) $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear i.parcrimeseverity1##c.sucscore i.sucscore if $gridSample                , cluster(clust) small
outreg, keep(timeserv $controlsKuziemko)  se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(4)") merge nodisplay
ivregress 2sls rtphitWt3 (timeserv = parmonths) $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear i.parcrimeseverity1             i.sucscore if $gridSample                , cluster(clust) small
outreg, keep(timeserv $controlsKuziemko)  se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(5)") merge nodisplay
ivregress 2sls rtphit3   (timeserv = parmonths) $controlsKuziemko drugcrime violcrime propcrime i.sentlenRound i.actualrelyear i.parcrimeseverity1             i.sucscore if $gridSample & inlist(sucscore, 8, 9), cluster(clust) small
outreg, keep(timeserv $controlsKuziemko)  se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(6)") merge

* Table III
PrepKuziemko, dataset(Ganong) // latest, Kuziemko-provided dataset has oldreldate zeroed out, so use older Ganong data
probit rtphit3 timerec timeserv                      if $massReleaseSample
margins, dydx(timerec timeserv) atmeans post
outreg,  keep(timerec timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(1)") nodisplay

probit rtphit3 timerec timeserv         i.admitmonth if $massReleaseSample
margins, dydx(timerec timeserv) atmeans post
outreg,  keep(timerec timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(2)") nodisplay merge

regress rtphitWt3 timerec timeserv                   if $massReleaseSample
outreg,  keep(timerec timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(3)") nodisplay merge

probit rtphit3 timerec timeserv sentLenMos           if $massReleaseSample
margins, dydx(timerec timeserv sentLenMos) atmeans post
outreg,  keep(timerec timeserv sentLenMos) se square sdec(4) starlevel(10 5 1) starloc(1) summstat(N) ctitle("", "(4)") nodisplay merge

probit rtphit3 timerec timeserv                      if $massReleaseSample & sentLenMos==36
margins, dydx(timerec timeserv) atmeans post
outreg,  keep(timerec timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(5)") nodisplay merge

probit rtphit3 timerec timeserv                      if $massReleaseSample & burglary
margins, dydx(timerec timeserv)atmeans post
outreg,  keep(timerec timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(6)") nodisplay merge

probit rtphit3 timerec timeserv $controlsKuziemko    if $massReleaseSample
margins, dydx(timerec timeserv $controlsKuziemko) atmeans post
outreg,  keep(timerec timeserv $controlsKuziemko) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(7)") merge

* Table IV
PrepKuziemko, dataset(Kuziemko)

tab curoff1 if ratedate<=td(30jun2002) & sentdate>=td(1jan1998) & pct90crime // compare: pap.georgia.gov/sites/pap.georgia.gov/files/Annual_Reports/2002_Annual_Report0001.pdf#page=14

cap drop pct90crime_after97
gen byte pct90crime_after97 = pct90crime * after97

probit rtphit3 i.pct90crime_after97 i.pct90crime i.sentbeginyear                                                                                         if $pct90Sample & sentLenMos<=60             , cluster(curoff1)
margins, dydx(pct90crime_after97 pct90crime) post
outreg, keep(1.pct90crime_after97 1.pct90crime)                            se square sdec(4) starloc(1) starlevels(10 5 1) summstat(N) ctitle("", "(1)") nodisplay

probit rtphit3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko hispanic i.sentlenRound sentLenMos timeserv i.parcrimeseverity1 if $pct90Sample & sentLenMos<=60, cluster(curoff1)
margins, dydx(pct90crime_after97 pct90crime $controlsKuziemko timeserv) atmeans post
outreg, keep(1.pct90crime_after97 1.pct90crime $controlsKuziemko timeserv) se square sdec(4) starloc(1) starlevels(10 5 1) summstat(N) ctitle("", "(2)") nodisplay merge
                                    
probit rtphit3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko hispanic i.sentlenRound sentLenMos timeserv i.parcrimeseverity1 if $pct90Sample & sentLenMos<=60 & (sentLenMos>=48 | pct90crime), cluster(curoff1)
margins, dydx(pct90crime_after97 pct90crime $controlsKuziemko timeserv) atmeans post
outreg, keep(1.pct90crime_after97 1.pct90crime $controlsKuziemko timeserv) se square sdec(4) starloc(1) starlevels(10 5 1) summstat(N) ctitle("", "(3)") nodisplay merge

regress rtphitWt3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko hispanic i.sentlenRound sentLenMos timeserv i.parcrimeseverity1 if $pct90Sample & sentLenMos<=60, cluster(curoff1)
outreg, keep(1.pct90crime_after97 1.pct90crime $controlsKuziemko timeserv) se square sdec(4) starloc(1) starlevels(10 5 1) summstat(N) ctitle("", "(4)") nodisplay merge
                                    
cap drop pct90crime_sentdate
gen pct90crime_sentdate = pct90crime * (year(sentbegindate) - 1990)
probit rtphit3 i.pct90crime_after97 i.pct90crime pct90crime_sentdate i.sentbeginyear $controlsKuziemko hispanic i.sentlenRound sentLenMos timeserv i.parcrimeseverity1 if $pct90Sample & sentLenMos<=60, cluster(curoff1)
margins, dydx(pct90crime_after97 pct90crime $controlsKuziemko timeserv) atmeans post
outreg, keep(1.pct90crime_after97 1.pct90crime $controlsKuziemko timeserv) se square sdec(4) starloc(1) starlevel(10 5 1) summstat(N) ctitle("", "(5)") merge


***
*** Figures & tables in review of Kuziemko
***

*** Grid-based design

* Graphs
PrepKuziemko, dataset(Kuziemko)
keep if $gridSampleDR

set scheme s1color
preserve
collapse (mean) mean=timeserv (median) median=timeserv if severitylevel<=4 & ratedate>=td(1may1983) & ratedate<td(1apr1993) & actualreldate<td(1jan2006), by(sucscore)
twoway connected mean median sucscore, lcolor(blue red) mcolor(blue red) xline(5.5 8.5 10.5 12.5, lcolor(black)) xlabel(0/20) ytitle("Months served") xtitle("Parole success score") ///
	legend(lab(1 "Mean") lab(2 "Median") region(lwidth(none)) bmargin(b=0) ring(0) bplace(ne) cols(1)) graphregion(margin(none)) name(TimeVSPoints1983, replace) ylabel(10(5)35) yscale(range(9.6 35)) ///
	text(9.5 2.75 "Poor") text(9.5 7 "Fair") text(9.5 9.5 "Avg") text(9.5 11.5 "Good") text(9.5 16.5 "Excellent")
graph export "Kuziemko time vs points, 1983 grid.png", width(666) replace
restore

preserve
collapse (mean) mean=timeserv (median) median=timeserv if severitylevel<=4 & sentdate>=td(1jan1995) & sentdate<td(1jan2006) & admitdate>=td(1jan1995) & actualreldate<td(1jan2006), by(sucscore)
twoway connected mean median sucscore, lcolor(blue red) mcolor(blue red) xline(8.5 13.5, lcolor(black)) xlabel(0/20) ytitle("Months served") xtitle("Parole success score") ///
	legend(lab(1 "Mean") lab(2 "Median") region(lwidth(none)) bmargin(b=0) ring(0) bplace(ne) cols(1)) graphregion(margin(none)) name(TimeVSPoints1993, replace) ylabel(10(5)35) yscale(range(9.6 35)) ///
	text(9.5 4.25 "Poor") text(9.5 11 "Average") text(9.5 17 "Excellent")
graph export "Kuziemko time vs points, 1993 grid.png", width(666) replace
restore

* Tables
PrepKuziemko, dataset(Kuziemko)
keep if rtphit3<. & (($gridSample) | ($gridSampleDR))
global subsample83 ratedate>=td(1may1983) & ratedate<td(1apr1993)
global subsample93 admitdate>=td(1jan1995) & sentdate>=td(1jan1995) & sentdate<td(1jan2006)

ivreg2 rtphit3 (timeserv = parmonths   ) $controlsKuziemko i.sentlenRound i.actualrelyear i.parcrimeseverity1 i.sucscore drugcrime violcrime propcrime if $gridSample, cluster(clust) small
outreg, keep(timeserv $controlsKuziemko) se bdec(4) starloc(1) starlevel(10 5 1) summstat(idp \ widstat \ N) ctitle("", "Replication") nodisplay
ivreg2 rtphit3 (timeserv = i.sucgroup93) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample93 & sucscore>=4  & sucscore<=13, cluster(clustDR) small partial(i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR)
outreg using "Kuziemko grid replication", replace keep(timeserv $controlsKuziemko) se bdec(4) starloc(1) starlevel(10 5 1) summstat(idp \ widstat \ N) ctitle("", "Revised") coljust(l{c}c) merge

set scheme s1color
outreg, clear(Results)
global gridmin    -.05
global gridmax     .05
global gridpoints 1000
foreach depvar in rtphit rtpFelony rtpFelonyConv {
	outreg, clear
	mata plot = J($gridpoints,0,0)
	global outregcmd outreg, keep(timeserv) se sdec(4) starloc(1) starlevel(10 5 1) `=cond("`depvar'"=="rtpFelonyConv","summstat(idp \ widstat \ N)","noautosumm")' merge nodisplay
	global weakivcmd artest, gridmin($gridmin) gridmax($gridmax) gridpoints($gridpoints) ptype(lower) nograph

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample83 & sucscore>=1  & sucscore<=8 , cluster(clustDR) small
	$outregcmd ctitle("", "1983, 5 to 6")
	$weakivcmd graphname(`depvar'83_56, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample83 & sucscore>=6  & sucscore<=10, cluster(clustDR) small
	$outregcmd ctitle("", "1983, 8 to 9")
	$weakivcmd graphname(`depvar'83_89, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample83 & sucscore>=9  & sucscore<=12, cluster(clustDR) small
	$outregcmd ctitle("", "1983, 10 to 11")
	$weakivcmd graphname(`depvar'83_1011, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample83 & sucscore>=11 & sucscore<=17, cluster(clustDR) small
	$outregcmd ctitle("", "1983, 12 to 13")
	$weakivcmd graphname(`depvar'83_1213, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	ivreg2 `depvar'3 (timeserv = i.sucgroup93) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample93 & sucscore>=4  & sucscore<=13, cluster(clustDR) small
	$outregcmd ctitle("", "1993, 8 to 9")
	$weakivcmd graphname(`depvar'93_89, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	ivreg2 `depvar'3 (timeserv = i.sucgroup93) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel c.sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & $subsample93 & sucscore>=9  & sucscore<=18, cluster(clustDR) small
	$outregcmd ctitle("", "1993, 13 to 14")
	$weakivcmd graphname(`depvar'93_1314, replace)
	mata plot = plot, st_matrix("r(plot)")[,2]

	outreg, append replay(Results) store(Results) nodisplay

	mata X = st_matrix("r(plot)")[|2,1 \ $gridpoints-1,1|] // X coordinates for likelihood functions
	mata likelihood = abs(plot[|3,.\.,.|] - plot[|.,. \ $gridpoints-2,.|]) / (X[3] - X[1]) // centered derivative/density estimates
	mata Bayes = exp(rowsum(ln(likelihood))); Bayes = Bayes / sum(Bayes) / (X[2] - X[1]) // Bayes-combined likelihood
	mata likelihood = likelihood, Bayes, X

	getmata (Plot`depvar'*) = likelihood, force replace
	twoway line Plot`depvar'?, name(`depvar', replace) legend(off) xtitle("Change in three-year return-to-prison rate per month extra time in prison") ytitle("Likelihood") ylabel(, angle(hor)) xlabel(-.05(.01).05)
	graph export "Kuziemko `depvar' AR.png", replace width(1000)
}
outreg using "Kuziemko grid replication", replace coljust(l{c}c) replay(Results) addtable

*** Kuziemko replication: mass release

PrepKuziemko, dataset(Ganong)
keep if $massReleaseSample
gen timecomm = timerec - timeserv

scatter timeserv timerec, xtitle("Months recommended by parole board") ytitle("Months actually served")
graph export "Kuziemko mass release scatter.png", replace width(666)

* documentation that no mass releasees returned for parole violations, cited in footnote
tab futureadmcode if rtp3

outreg, clear(Results)
foreach depvar in rtphit3 reconv3 {
	outreg, clear
	global outregcmd outreg, se sdec(4) starloc(1) starlevels(10 5 1) `=cond("`depvar'"=="reconv3","summstat(N)","noautosumm")' nodisplay merge
	
	probit `depvar' timerec timeserv
	margins, dydx(*) post atmeans
	$outregcmd ctitle("", "Replication")

	probit `depvar' timerec timecomm
	margins, dydx(*) post atmeans
	$outregcmd ctitle("", "Replication")

	probit `depvar' timecomm timeserv
	margins, dydx(*) post atmeans
	$outregcmd ctitle("", "Replication")

	probit `depvar' c.timecomm timeserv if numpriorincars==0
	margins, dydx(*) post atmeans
	$outregcmd ctitle("", "First-timers")

	probit `depvar' c.timecomm timeserv if numpriorincars>0
	margins, dydx(*) post atmeans
	$outregcmd ctitle("", "Returnees")
	
	outreg, append replay(Results) store(Results) nodisplay
}
outreg using "Kuziemko mass release replication", replace replay(Results) coljust(l{c}c)

* Test of diminishing returns to time commuted, mentioned in footnote
probit rtphit3 timerec timecomm // first, just the regular regression, for comparison
margins, dydx(timecomm) at(timecomm=(0/16))
marginsplot, name(linear, replace) ylabel(-.1(.1).2)
probit rtphit3 timerec c.timecomm##c.timecomm // add quadratic term
margins, dydx(timecomm) at(timecomm=(0/16))
marginsplot, name(quadratic, replace)


* formal test of equality of impacts of timecomm for those with and without priors, reported in text
gen byte priors = numpriorincars>0
probit rtphit3 priors##(c.timecomm c.timeserv)
test 1.priors#c.timecomm

*** Kuziemko replication: 90% rule

PrepKuziemko, dataset(Kuziemko)
keep if ($pct90Sample) | ($pct90SampleDR)
gen pct90crime_sentdate = .
gen pct90crime_after97 = .

outreg, clear(Results)
foreach depvar in rtphit rtpFelony rtpFelonyConv {
	outreg, clear

	global outregcmd outreg, se sdec(4) starloc(1) starlevels(10 5 1) `=cond("`depvar'"=="rtphit","summstat(N)","noautosumm")' nodisplay merge

	replace pct90crime_after97 = pct90crime * after97
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentbeginyear                                                                                                       if $pct90Sample & sentLenMos<=60, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime) atmeans post                                                                                  
	$outregcmd ctitle("", "Replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97DR
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentyear                                                                                                            if $pct90SampleDR, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime) atmeans post                                                                                  
	$outregcmd ctitle("", "Revised replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko timeserv i.parcrimeseverity1 i.sentlenRound sentLenMos hispanic                     if $pct90Sample & sentLenMos<=60, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post                                                                                  
	$outregcmd ctitle("", "Replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97DR
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentyear      $controlsKuziemko timeserv i.severitylevel     i.sentlenRound sentLenMos hispanic                     if $pct90SampleDR, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post                                                                                  
	$outregcmd ctitle("", "Revised replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko timeserv i.parcrimeseverity1 i.sentlenRound sentLenMos hispanic                     if $pct90Sample & sentLenMos<=60 & (sentLenMos>=48 | pct90crime), cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post                                                                                  
	$outregcmd ctitle("", "Replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97DR
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentyear      $controlsKuziemko timeserv i.severitylevel     i.sentlenRound sentLenMos hispanic                     if $pct90SampleDR                & (sentLenMos>=48 | pct90crime), cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post                                                                                  
	$outregcmd ctitle("", "Revised replication")                                                                                                                                

	replace pct90crime_after97 = pct90crime * after97
	replace pct90crime_sentdate = pct90crime#c.sentbegindate                                                                                                                                 
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentbeginyear $controlsKuziemko timeserv i.parcrimeseverity1 i.sentlenRound sentLenMos hispanic pct90crime_sentdate if $pct90Sample & sentLenMos<=60, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post
	$outregcmd ctitle("", "Replication")

	replace pct90crime_after97 = pct90crime * after97DR
	replace pct90crime_sentdate = pct90crime#c.sentdate                                                                                                                                 
	probit `depvar'3 i.pct90crime_after97 i.pct90crime i.sentyear      $controlsKuziemko timeserv i.severitylevel     i.sentlenRound sentLenMos hispanic pct90crime_sentdate if $pct90SampleDR, cluster(curoff1)
	margins, dydx(pct90crime_after97 pct90crime timeserv) atmeans post
	$outregcmd ctitle("", "Revised replication")

	outreg, append replay(Results) store(Results) nodisplay
}
outreg using "Kuziemko 90% replication", replace replay(Results) coljust(l{c}c)

* Fuzzy RDD regressions reported in text
ivregress 2sls rtphit3 (timeserv = after97DR) sentdate $controlsKuziemko i.severitylevel hispanic if $pct90SampleDR & sentLenMos<=60 & abs(sentdate-td(1jan1998))<365.25 & pct90crime, cluster(curoff1) small
weakiv, graph(ar)
ivregress 2sls rtpFelony3 (timeserv = after97DR) sentdate $controlsKuziemko i.severitylevel hispanic if $pct90SampleDR & sentLenMos<=60 & abs(sentdate-td(1jan1998))<365.25 & pct90crime, cluster(curoff1) small
weakiv, graph(ar)
ivregress 2sls rtpFelonyConv3 (timeserv = after97DR) sentdate $controlsKuziemko i.severitylevel hispanic if $pct90SampleDR & sentLenMos<=60 & abs(sentdate-td(1jan1998))<365.25 & pct90crime, cluster(curoff1) small
weakiv, graph(ar)

* basis for estimate in text of 366 wrongly classified subjects
qui regress rtphit3 i.pct90crime_after97 i.pct90crime i.sentyear if $pct90SampleDR, cluster(curoff1)
count if e(sample) & pct90crime & crimedate<td(1jan1998) & sentdate>=td(1jan1998) & actualreldate>=td(1sep2002)


***
*** Kuziemko & Ganong: Science table
***

outreg, clear(Results)
foreach depvar in rtphit rtpTechMisdPar rtpFelonyNonconvPar rtpTechMisdProb rtpFelonyNonconvProb rtpFelonyConv {
	outreg, clear
	global outregcmd outreg, keep(timeserv) se sdec(4) starloc(1) starlevel(10 5 1) `=cond("`depvar'"=="rtpFelonyConv","summstat(widstat \ N)","noautosumm")' merge rtitle("`depvar'") //nodisplay

	PrepKuziemko, dataset(Kuziemko)
	keep if rtphit3<. & (($gridSample) | ($gridSampleDR))
	gen rtpTechMisdPar3      = rtpPar3    - rtpFelonyPar3
	gen rtpFelonyNonconvPar3 = rtpFelonyPar3 - rtpFelonyConvPar3
	gen rtpTechMisdProb3      = rtpProb3    - rtpFelonyProb3
	gen rtpFelonyNonconvProb3 = rtpFelonyProb3 - rtpFelonyConvProb3

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & ratedate>=td(1may1983) & ratedate<td(1apr1993) & sucscore>=1  & sucscore<=8 , cluster(clustDR) small
	$outregcmd ctitle("", "1983, 5 to 6")

	ivreg2 `depvar'3 (timeserv = i.sucgroup79) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & ratedate>=td(1may1983) & ratedate<td(1apr1993) & sucscore>=6  & sucscore<=10, cluster(clustDR) small
	$outregcmd ctitle("", "1983, 8 to 9")

	ivreg2 `depvar'3 (timeserv = i.sucgroup93) $controlsKuziemko i.sentlenRound i.actualrelyear i.severitylevel sucscore drugcrimeDR violcrimeDR propcrimeDR if $gridSampleDR & admitdate>=td(1jan1995) & sentdate>=td(1jan1995) & sentdate<td(1jan2006) & sucscore>=4  & sucscore<=13, cluster(clustDR) small
	$outregcmd ctitle("", "1993, 8 to 9")

	foreach RevisionDate in 1May1983 1Apr1993 {
		PrepGanong `RevisionDate' if !duplicateGanong & min(ratedate,actualreldate)>=td(`RevisionDate')-365*4
		keep if ratedate >= td(`RevisionDate')-366 & ratedate < td(`RevisionDate')+365 & validDates & servdaysprsnjail>=90 & servdaysprsnjail<=10*365 & obsIsFirstratedate
		replace timeserv = timeserv * 12 // convert to months
		gen rtpTechMisdPar3      = rtpPar3    - rtpFelonyPar3
		gen rtpFelonyNonconvPar3 = rtpFelonyPar3 - rtpFelonyConvPar3
		gen rtpTechMisdProb3      = rtpProb3    - rtpFelonyProb3
		gen rtpFelonyNonconvProb3 = rtpFelonyProb3 - rtpFelonyConvProb3

		ivreg2 `depvar'3 (timeserv = parmonths) i.severitylevel#i.sucscore, cluster(cellCluster) small
		$outregcmd ctitle("", "`RevisionDate' revision")
	}
	outreg, append replay(Results) store(Results)
}
outreg using "Kuziemko & Ganong Science", replace coljust(l{c}c) replay(Results)
